__author__ = "CCH陈常鸿"
'''
爬取新浪体育有关足球的新闻
沈：https://azsxx.vip:5560/luyifa/2016/0802/2260_10.html
果儿：https://azsxx.vip:5560/luyifa/2015/0630/1711_14.html
https://azsxx.vip:5560/luyifa/2019/0111/6397_14.html
https://azsxx.vip:5560/luyifa/2017/0306/3093_3.html
'''

import requests
from bs4 import BeautifulSoup

path_read = 'E:\pycharm\work/url_data.txt'
path_write = 'E:\pycharm\work/content_data.txt'
#获取内容
def getNews(url):
    wbdata = requests.get(url).text
    soup = BeautifulSoup(wbdata, 'lxml')
    a = soup.select("article.art_box > p.art_p")
    c = 0
    for i in a:
        content = a[c].get_text()#换成保存
        WriteContent(path_write,content)
        c = c + 1

#本地读取url库
def ReadUrl(path):
    with open(path) as f:
        for url in f.readlines():
            getNews(url)#传到getNews(url)

#写入content
def WriteContent(path,content):
    with open(path, "a") as f:
            f.write(content)

#获取链接
def getURL(URL):
    wbdata = requests.get(URL).text
    soup = BeautifulSoup(wbdata, 'html.parser')
    a = soup.find_all('a')
    o = 0
    for i in a:
        c = i.get('href')
        print(c)
        o += 1